* This program creates the key dataset for endline women data that gets used for subsequent regressions.

local folder "C:\Users\mlevere\OneDrive - Mathematica\Documents\Projects\Nepal\"

clear all
set more off

use "`folder'/Data/endline_raw_women", clear

merge m:1 www hh sn using "`folder'/Data/endline_raw_hh", nogen keep(3)

/* Rename all endline variables, so can merge in the baseline ones too */
local norenamevars = "district vdc vdc_code treatment cash_control info_control vdc_name www hh sn q00_001a control_vdc info_vdc cash_vdc"

qui ds `norenamevars', not
local varnames "`r(varlist)'"
foreach var of varlist `varnames' {
rename `var' `var'_e
}

rename q00_001a idc

tempfile women_data_panel
save "`women_data_panel'", replace

// Keep all women from households that were interviewed at baseline (i.e. no new households)
use "`folder'/Data/baseline_raw_hh.dta", clear
keep www hh district vdc vdc_code
merge 1:m www hh district vdc vdc_code using "`women_data_panel'", nogen keep(3)

save "`women_data_panel'", replace

// Add on baseline information where available
use "`folder'/Data/baseline_raw_women.dta", clear
merge m:1 www hh using "`folder'/Data/baseline_raw_hh.dta", nogen keep(3)
drop expend_total income_total

egen expend_total = rowtotal(expend_fuel-expend_agri_goods)
egen income_total = rowtotal(income_agri-income_prize)

gen expend_total_l = log(expend_total)
gen income_total_l = log(income_total)

local norenamevars = "www hh idc district vdc vdc_code treatment cash_control info_control vdc_name control_vdc info_vdc cash_vdc"

local kvars "age_years school woman_numkids hh_head_age num_hh_members hh_head_male reg_electricity roof separate_kitchen woman_weight woman_height rooms stories hh_land farmland knowledge_firstmilk knowledge_exclusivebreastfeed knowledge_6t11_fed knowledge_12t24_snacks knowledge_breastfeed_diahrrea knowledge_anc_checkups knowledge_iron_supp knowledge_deworming knowledge_vitaminA knowledge_pregnant_food youngest_3days_nonbreastmilk expend_total income_total expend_total_l income_total_l youngest_age_months curr_pregnant"

keep `norenamevars' `kvars'

// Rename the baseline variables we are keeping to have _b at end for simple merging
foreach var of local kvars {
rename `var' `var'_b
}

// Merge information about the baseline characteristics for youth (where available) onto the main data
// Recall that we are already only keeping households who were present at baseline 
merge 1:m `norenamevars' using "`women_data_panel'", keep(2 3)
gen has_baseline_data = _merge == 3
gen no_baseline_data = _merge == 2
drop _merge

/* Keep only those in wave 2 vdc's */

merge m:1 district vdc_name using "`folder'/Data/wave1_vdcs.dta", keep(1 3) nogen

drop if knowledge_firstmilk_e == .

/* Generate variables that will be used in analysis */
egen expend_total_e = rowtotal(expend_fuel_e-expend_agri_goods_e)
egen income_total_e = rowtotal(income_agri_e-income_prize_e)

gen expend_total_l = log(expend_total_e)
gen income_total_l = log(income_total_e)

// Female age groups
gen woman_tot_buckets_b = age_years_b
replace woman_tot_buckets_b = 20 if age_years_b <= 20
replace woman_tot_buckets_b = 25 if age_years_b >= 21 & age_years_b <= 25
replace woman_tot_buckets_b = 30 if age_years_b >= 26 & age_years_b <= 30
replace woman_tot_buckets_b = 35 if age_years_b >= 31
tab woman_tot_buckets_b, gen(woman_age_b)

replace school_b = 0 if school_b == 2
replace youngest_3days_nonbreastmilk_b = 0 if youngest_3days_nonbreastmilk_b == 2


replace attend_co_meeting_e = 0 if attend_co_meeting_e == 2

replace reg_electricity_b = 0 if reg_electricity_b == 2
gen stone_roof_b = roof_b == 6
replace separate_kitchen_b = 0 if separate_kitchen_b == 2


gen hh_head_tot_buckets_b = floor(hh_head_age_b/10)
replace hh_head_tot_buckets_b = 2 if hh_head_age_b < 20
replace hh_head_tot_buckets_b = 6 if hh_head_age_b >= 70
tab hh_head_tot_buckets_b, gen(hh_head_age_b)

replace youngest_tt_injection_e = 0 if youngest_tt_injection_e == 2

replace youngest_irontab_e = 0 if youngest_irontab_e == 2

replace youngest_vitaminA_e = 0 if youngest_vitaminA_e == 2

replace youngest_3days_nonbreastmilk_e = 0 if youngest_3days_nonbreastmilk_e == 2

// Baseline knowledge index

gen exclusive_6mo_b = knowledge_exclusivebreastfeed_b == 2

gen preg_more_b = knowledge_pregnant_food_b == 1

gen diarrhea_more_b = knowledge_breastfeed_diahrrea_b == 3

gen givebaby_firstmilk_b = knowledge_firstmilk_b == 1

gen feed_6t11_b = knowledge_6t11_fed_b >= 2 & knowledge_6t11_fed_b <= 4

gen snacks_12t24_b = knowledge_12t24_snacks_b >= 4 & knowledge_12t24_snacks_b <= 6

gen iron_3t4_b = knowledge_iron_supp_b == 3 | knowledge_iron_supp_b == 4

gen deworming_2ndtri_b = knowledge_deworming_b >= 4 & knowledge_deworming_b <= 7

gen vitaminA_45_b = knowledge_vitaminA_b <= 45

gen anc_checkups_4_b = knowledge_anc_checkups_b == 4

gen knowledge_index_b = givebaby_firstmilk_b + exclusive_6mo_b + feed_6t11_b + snacks_12t24_b + diarrhea_more_b + iron_3t4_b + deworming_2ndtri_b + vitaminA_45_b + preg_more_b + anc_checkups_4_b


// Endline knowledge index

gen exclusive_6mo_e = knowledge_exclusivebreastfeed_e == 2

gen preg_more_e = knowledge_pregnant_food_e == 1

gen diarrhea_more_e = knowledge_breastfeed_diahrrea_e == 3

gen givebaby_firstmilk_e = knowledge_firstmilk_e == 1

gen feed_6t11_e = knowledge_6t11_fed_e >= 2 & knowledge_6t11_fed_e <= 4

gen snacks_12t24_e = knowledge_12t24_snacks_e >= 4 & knowledge_12t24_snacks_e <= 6

gen iron_3t4_e = knowledge_iron_supp_e == 3 | knowledge_iron_supp_e == 4

gen deworming_2ndtri_e = knowledge_deworming_e >= 4 & knowledge_deworming_e <= 7

gen vitaminA_45_e = knowledge_vitaminA_e <= 45

gen anc_checkups_4_e = knowledge_anc_checkups_e == 4

gen knowledge_index = givebaby_firstmilk_e + exclusive_6mo_e + feed_6t11_e + snacks_12t24_e + diarrhea_more_e + iron_3t4_e + deworming_2ndtri_e + vitaminA_45_e + preg_more_e + anc_checkups_4_e

gen exclusive_breastfed_6mos = youngest_breastfed_exclusive_e == 6 | (youngest_breastfed_exclusive_e == 99 & youngest_age_months_e <= 6)
gen first_breastfed_0hr = youngest_breastfed_first_e == 0
gen youngest_irontab_5t6mos = youngest_irontab_num_e >= 150 & youngest_irontab_num_e <= 210


local foods "porridge_ue porridge_e vegetables fruits eggs milk meat lentils"
foreach food of local foods {
gen mention_kid_`food' = goodfoods_kid_`food'_e == 1
}
egen mention_kids_items = rowtotal(mention_kid*) 


/* Food Groups */
gen dairy = youngest_past24hrs_formula_e == 1 | youngest_past24hrs_animalmilk_e == 1 | youngest_past24hrs_yogurt_e == 1

gen grain = youngest_past24hrs_fortified_e == 1 | youngest_past24hrs_porridge_e == 1 | youngest_past24hrs_rice_grains_e == 1 | youngest_past24hrs_potatoes_e == 1

gen vitA_veg = youngest_past24hrs_yo_veg_e == 1 | youngest_past24hrs_yo_fruits_e == 1 | youngest_past24hrs_leafy_veg_e == 1

gen oth_veg = youngest_past24hrs_oth_fruit_e == 1

gen eggs = youngest_past24hrs_eggs_e == 1

gen meat = youngest_past24hrs_meat_e == 1 | youngest_past24hrs_organ_meat_e == 1 | youngest_past24hrs_fish_e == 1 | youngest_past24hrs_snails_e == 1

gen nuts = youngest_past24hrs_lentils_e == 1

egen foodgroups = rowtotal(dairy-nuts)
replace foodgroups = . if youngest_over24months_e ~= 1 | youngest_breastfed_exclusive_e == 99


gen fertility_new = woman_numkids_e > woman_numkids_b & youngest_age_months_e <= 15 | curr_pregnant_e == 1
replace curr_pregnant_e = 0 if curr_pregnant_e == 2

// Establish if the women had a new child
gen newkids = 1 if woman_numkids_e > woman_numkids_b & youngest_age_months_e <= 13

gen values_health = health_ranking_e == 1
gen values_food = food_ranking_e == 1
gen values_educ = education_ranking_e == 1
gen values_security = security_ranking_e == 1

gen educ_expect_slc = child_educ_expect_e >= 11 & child_educ_expect_e <= 15

gen occ_expect_ag = child_occ_expect_e == 1
gen occ_expect_ind = child_occ_expect_e == 2
gen occ_expect_svc = child_occ_expect_e == 4

gen flag_women_baselineweight = woman_weight_b > 99

/* Drop households that were dropped at baseline */
merge m:1 www hh using "`folder'/Data/baseline_flag_hh.dta", nogen keep(3)
drop if flag_spending == 1 | flag_missingwomen == 1
drop flag*
svyset vdc_code, strata(district)

/* Establish outliers with the flag variable */

/* Replace the flag on HH Assets = 1 if:
   More than 10 rooms used by household
   4 story house
   Enormous values of Houshold Land/Farmland
   */
  
gen flag_hh_assets = 0
replace flag_hh_assets = 1 if rooms_e >= 10
replace flag_hh_assets = 1 if stories_e >= 4
replace flag_hh_assets = 1 if hh_land_e > 10 & hh_land_e ~= .
replace flag_hh_assets = 1 if farmland_e > 80 & farmland_e ~= .

/* Replace the flag on HH Spending = 1 if:
   Monthly spending > Annual Income
   Monthly Spending over 50000
   Monthly Spending = 0
   Annual Income > 500000 Rs
   */

gen flag_spending = 0
replace flag_spending = 1 if expend_total_e > income_total_e
replace flag_spending = 1 if expend_total_e > 50000
replace flag_spending = 1 if expend_total_e == 0
replace flag_spending = 1 if income_total_e > 500000

/* Replace the flag on Calories = 1 if:
   Calories per person exceed 5000 */
gen flag_cals = 0
replace flag_cals = 1 if cals_perperson_e > 5000

gen flag = flag_hh_assets == 1 | flag_spending == 1 | flag_cals == 1

/* Create indexes as in Anderson 2008: Knowledge */

local knowledge "exclusive_6mo preg_more diarrhea_more givebaby_firstmilk feed_6t11 snacks_12t24 iron_3t4 deworming_2ndtri vitaminA_45 anc_checkups_4"
foreach var of local knowledge {
egen `var'_m_c = mean(`var'_e) if control_vdc == 1 & flag == 0
egen `var'_sd_c = sd(`var'_e) if control_vdc == 1  & flag == 0
egen `var'_sd = max(`var'_sd_c)
egen `var'_m = max(`var'_m_c)
gen `var'_z = (`var'_e-`var'_m)/`var'_sd
}

corrmat exclusive_6mo_z preg_more_z diarrhea_more_z givebaby_firstmilk_z feed_6t11_z snacks_12t24_z iron_3t4_z deworming_2ndtri_z vitaminA_45_z anc_checkups_4_z, covmat(know_cov)
mat ones = J(10,1,1)
mat weights = inv(ones'*inv(know_cov)*ones)
local weight = weights[1,1]
mat know_cov_inv = inv(know_cov)
foreach col of numlist 1/10 {
local weightvar`col' = know_cov_inv[1,`col']+know_cov_inv[2,`col']+know_cov_inv[3,`col']+know_cov_inv[4,`col'] + know_cov_inv[5,`col']+know_cov_inv[6,`col']+know_cov_inv[7,`col']+know_cov_inv[8,`col']+know_cov_inv[9,`col']+know_cov_inv[10,`col']
}
gen know_index = `weight'*(`weightvar1'*exclusive_6mo_z + `weightvar2'*preg_more_z + `weightvar3'*diarrhea_more_z + `weightvar4'*givebaby_firstmilk_z + `weightvar5'*feed_6t11_z + `weightvar6'*snacks_12t24_z + `weightvar7'*iron_3t4_z + `weightvar8'*deworming_2ndtri_z + `weightvar9'*vitaminA_45_z + `weightvar10'*anc_checkups_4_z)

/* Create indexes as in Anderson 2008: behavior with new children */

// need to redefine all indicators as "good" (i.e. no nightblindness) and demean */
gen nonightblindness = youngest_nightblindness_e == 0

gen threedays_onlybreastmilk = youngest_3days_nonbreastmilk_e == 0

local demean "youngest_antenatal_care_e youngest_deworming_e youngest_vitaminA_e threedays_onlybreastmilk exclusive_breastfed_6mos first_breastfed_0hr youngest_irontab_5t6mos"
foreach var of local demean {
egen `var'_m_c = mean(`var') if control_vdc == 1 & newkids == 1 & flag == 0
egen `var'_sd_c = sd(`var') if control_vdc == 1 & newkids == 1 & flag == 0
egen `var'_m = max(`var'_m_c) if newkids == 1
egen `var'_sd = max(`var'_sd_c) if newkids == 1
gen `var'_z = (`var'-`var'_m)/`var'_sd if newkids == 1
}

replace youngest_antenatal_care_e = . if newkids == .

corrmat youngest_antenatal_care_e_z youngest_deworming_e_z youngest_vitaminA_e_z threedays_onlybreastmilk_z exclusive_breastfed_6mos_z first_breastfed_0hr_z youngest_irontab_5t6mos_z, covmat(behav_cov_z)
mat ones_z = J(7,1,1)
mat weights_z = inv(ones_z'*inv(behav_cov_z)*ones_z)
local weight_z = weights_z[1,1]
mat behav_cov_inv_z = inv(behav_cov_z)
foreach col of numlist 1/7 {
local weightvarz`col' = behav_cov_inv_z[1,`col']+behav_cov_inv_z[2,`col']+behav_cov_inv_z[3,`col']+behav_cov_inv_z[4,`col']+behav_cov_inv_z[5,`col']+behav_cov_inv_z[6,`col']+behav_cov_inv_z[7,`col']
}
gen behav_index_z = `weight_z'*(`weightvarz1'*youngest_antenatal_care_e_z + `weightvarz2'* youngest_deworming_e_z + `weightvarz3'* youngest_vitaminA_e_z + `weightvarz4'* threedays_onlybreastmilk_z + `weightvarz5'* exclusive_breastfed_6mos_z + `weightvarz6'* first_breastfed_0hr_z + `weightvarz7'* youngest_irontab_5t6mos_z)

by www hh, sort: gen wno = _n

label variable info_vdc "Info Only"
label variable cash_vdc "Info + Cash"

save "`folder'/Data/women_endline_reg_data", replace


